In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import explained_variance_score # dispersion(spreading) of error in dataset
from sklearn.metrics import confusion_matrix
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
In [2]:
dataset=pd.read_csv("flatrentwithprice.csv")
dataset.head()
Out[2]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [3]:
dataset=dataset.drop(['age','distance','floor','facing','flat_rent'],axis=1)
In [4]:
dataset.head()
Out[4]:
size flat_price
0 870 2300000
1 900 3000000
2 1050 5000000
3 950 1700000
4 750 2700000
In [5]:
# X(Independent variables) and y(target variables) 
X = dataset.iloc[:,0:].values 
y = dataset.iloc[:,1].values 
In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=42) # test size means size of the set test.
In [7]:
# Decision Tree Classifier
tr_regressor=DecisionTreeClassifier(random_state=42)
tr_regressor.fit(X_train,y_train)
tr_regressor.score(X_test,y_test)
pred_tr=tr_regressor.predict(X_test)
decision_score=tr_regressor.score(X_test,y_test)
expl_tr=explained_variance_score(pred_tr,y_test)
In [8]:
# Random Forest Regression Model
rf_regressor=RandomForestClassifier(random_state=42)
rf_regressor.fit(X_train,y_train)
rf_regressor.score(X_test,y_test)
pred_rf=rf_regressor.predict(X_test)
rf_score=rf_regressor.score(X_test,y_test)
expl_rf=explained_variance_score(pred_rf,y_test)
In [9]:
print("The Score of the Decision Tree Classifier:",round(tr_regressor.score(X_test,y_test)*100))
print("The score of the Random Forest  Classifier:",round(rf_regressor.score(X_test,y_test)*100))
The Score of the Decision Tree Classifier: 72
The score of the Random Forest  Classifier: 35
In [10]:
# Confusion Matrix for Decision Tree Classifier.
conf_mat=confusion_matrix(y_test,pred_tr)
print(conf_mat)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]]
In [11]:
# Confusion Matrix for Random Forest Classifier.
conf_mat=confusion_matrix(y_test,pred_rf)
print(conf_mat)
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 2 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]]
In [12]:
# R2 score ,Mean Square Error , Mean Absolute Error,f1 score,Recall Score,Precision Score in Decision Tree Classifier.
mse=mean_squared_error(y_test,pred_tr)
mae=mean_absolute_error(y_test,pred_tr)
score=r2_score(y_test,pred_tr)
f1score=f1_score(y_test,pred_tr,average='micro')#Calculate metrics globally by counting the total true positives, false negatives and false positives.
f1sc=f1_score(y_test,pred_tr,average='macro') #Calculate metrics for each label, and find their unweighted mean. 
                                                #This does not take label imbalance into account.
recall=recall_score(y_test, pred_tr, average='macro')
#Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account.
re=recall_score(y_test, pred_tr, average='micro') #Calculate metrics globally by counting the total true positives, false negatives and false positive
rmse=np.sqrt(mse)
preci=precision_score(y_test, pred_tr, average='macro')
pr=precision_score(y_test, pred_tr, average='micro')
print('the mse is ',mse)
print('mae is ',mae)
print('score is ',score)
print('rmse',rmse)
print("The f1 score  in micro:",f1score)
print("The F1 score  in macro: ",f1sc)
print("The recall score  in macro: ",recall)
print("The recall score in micro:",re)
print("The precision score in macro:",preci)
print("The precision score in micro",pr)
the mse is  13250000000.0
mae is  52500.0
score is  0.9923602227067154
rmse 115108.64433221339
The f1 score  in micro: 0.7250000000000001
The F1 score  in macro:  0.5785714285714285
The recall score  in macro:  0.6071428571428571
The recall score in micro: 0.725
The precision score in macro: 0.5714285714285714
The precision score in micro 0.725
In [13]:
# R2 score ,Mean Square Error , Mean Absolute Error,Fi score , Recall Score in Random Forest Classifier .
mse=mean_squared_error(y_test,pred_rf)
mae=mean_absolute_error(y_test,pred_rf)
score=r2_score(y_test,pred_rf)
rmse=np.sqrt(mse)
f1score=f1_score(y_test,pred_rf,average='micro')
f1sc=f1_score(y_test,pred_rf,average='macro')
recall=recall_score(y_test, pred_rf, average='macro')
re=recall_score(y_test, pred_rf, average='micro')
preci=precision_score(y_test, pred_rf, average='macro')
pr=precision_score(y_test, pred_rf, average='micro')
print('the mse is ',mse)
print('mae is ',mae)
print('score is ',score)
print('rmse',rmse)
print("The f1 score is in micro:",f1score)
print("The F1 score is in macro: ",f1sc)
print("The recall score  in macro: ",recall)
print("The recall score in micro:",re)
print("The precision score in macro:",preci)
print("The precision score in micro",pr)
the mse is  368500000000.0
mae is  310000.0
score is  0.7875277032018595
rmse 607042.0084310476
The f1 score is in micro: 0.35
The F1 score is in macro:  0.24666666666666667
The recall score  in macro:  0.27777777777777773
The recall score in micro: 0.35
The precision score in macro: 0.27888888888888896
The precision score in micro 0.35
In [14]:
tab_formate=pd.DataFrame({'Models':["Decision Tree Classifier","Random Forest Classifier"],
                           'Score':[ decision_score, rf_score],"Explained_Variance_Score":[ expl_tr, expl_rf]})
tab_formate.sort_values(by="Score",ascending=False)
Out[14]:
Models Score Explained_Variance_Score
0 Decision Tree Classifier 0.725 0.991977
1 Random Forest Classifier 0.350 0.777980
In [15]:
import matplotlib.pyplot as plt
import csv

x = []
y = []

with open('flatrentwithprice.csv','r') as fl:
	plots = csv.reader(fl, delimiter = ',')
	
	for row in plots:
		x.append(row[0])
		y.append(row[5])

plt.bar(x, y, color = 'maroon', width = 0.6, label = "Flat_Price")
plt.xticks(rotation=55)
plt.xlabel("Size")
plt.ylabel('Flat_Price')
plt.title('Size Vs Flat_Price')
plt.legend()
plt.show()
In [16]:
#scatter_plotting.py

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn')  # to get seaborn scatter plot

# read the csv file to extract data

data = pd.read_csv('flatrentwithprice.csv')
size = data['size']
flat_price = data['flat_price']

plt.scatter(size, flat_price, s=100, alpha=0.6, edgecolor='black', linewidth=1)

plt.title('Size vs Flat_Price')
plt.xlabel('Flat_price')
plt.ylabel('Size(square-foot)')

plt.tight_layout()
plt.show()
data.head()
Out[16]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [17]:
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("flatrentwithprice.csv")
fig = plt.figure()
plt.plot(data['size'],data['flat_price'])
fig.autofmt_xdate()
plt.show()
In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import matplotlib.pyplot as plt
In [19]:
dataset=pd.read_csv("flatrentwithprice.csv")
dataset.head()
Out[19]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [20]:
dataset=dataset.drop(['size','floor','age','facing','flat_rent'],axis=1)
In [21]:
dataset.head()
Out[21]:
distance flat_price
0 1.5 2300000
1 2.4 3000000
2 2.0 5000000
3 3.0 1700000
4 2.0 2700000
In [22]:
X = dataset.iloc[:,0:].values
y=dataset.iloc[:,1].values
In [23]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)
In [24]:
# Decision Tree Classifier
tr_regressor=DecisionTreeClassifier(random_state=42)
tr_regressor.fit(X_train,y_train)
tr_regressor.score(X_test,y_test)
pred_tr=tr_regressor.predict(X_test)
decision_score=tr_regressor.score(X_test,y_test)
expl_tr=explained_variance_score(pred_tr,y_test)
In [25]:
# Random Forest Classifier
rf_regressor=RandomForestClassifier(random_state=42)
rf_regressor.fit(X_train,y_train)
rf_regressor.score(X_test,y_test)
pred_rf=rf_regressor.predict(X_test)
rf_score=rf_regressor.score(X_test,y_test)
expl_rf=explained_variance_score(pred_rf,y_test)
In [26]:
print("The Score of the Decision Tree Classifier between distance and flat_price:",round(tr_regressor.score(X_test,y_test)*100))
print("The score of the Random Forest  Classifier between distance and flat_price:",round(rf_regressor.score(X_test,y_test)*100))
The Score of the Decision Tree Classifier between distance and flat_price: 63
The score of the Random Forest  Classifier between distance and flat_price: 30
In [27]:
tab_formate=pd.DataFrame({'Models':["Decision Tree Classifier","Random Forest Classifier"],
                           'Score':[ decision_score, rf_score],"Explained_Variance_Score":[ expl_tr, expl_rf]})
tab_formate.sort_values(by="Score",ascending=False)
Out[27]:
Models Score Explained_Variance_Score
0 Decision Tree Classifier 0.633333 0.992595
1 Random Forest Classifier 0.300000 0.967750
In [28]:
dataset=pd.read_csv("flatrentwithprice.csv")
x=dataset[["size"]]
y=dataset[["flat_price"]]
In [29]:
data=pd.read_csv('flatrentwithprice.csv')
data.head()
Out[29]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [30]:
import matplotlib.pyplot as plt
import csv

x = []
y = []

with open('flatrentwithprice.csv','r') as fl:
	plots = csv.reader(fl, delimiter = ',')
	
	for row in plots:
		x.append(row[1])
		y.append(row[5])

plt.bar(x, y, color = 'blue', width = 0.4, label = "Flat_Price")
plt.xticks(rotation=55)
plt.xlabel("Distance")
plt.ylabel('Flat_Price')
plt.title('Distance Vs Flat_Price')
plt.legend()
plt.show()
In [31]:
#scatter_plotting.py

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('seaborn')  # to get seaborn scatter plot

# read the csv file to extract data

data = pd.read_csv('flatrentwithprice.csv')
distance = data['distance']
flat_price = data['flat_price']

plt.scatter(flat_price, distance, s=100, alpha=0.6, edgecolor='black', linewidth=1)

plt.title('Distance vs Flat_Price')
plt.xlabel('Flat_price')
plt.ylabel('Distance')
plt.tight_layout()
plt.show()
In [32]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
fd=pd.read_csv("flatrentwithprice.csv")
X =  pd.DataFrame(sc_X.fit_transform(fd.drop(["flat_rent"],axis = 1)),
        columns=['size', 'distance', 'floor', 'age', 'facing', 'flat_price'])
In [33]:
y=fd['flat_rent']
In [34]:
from sklearn.model_selection import train_test_split                                                                  
X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=.2,random_state=42)
In [35]:
corr=X_train.corr()
In [36]:
sns.heatmap(corr,annot=True,cmap='Blues')
Out[36]:
<AxesSubplot:>
In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
In [38]:
data = pd.read_csv("flatrentwithprice.csv")
print(data.head())
   size  distance  floor  age  facing  flat_price  flat_rent
0   870       1.5      1    2       1     2300000       9000
1   900       2.4      2    5       1     3000000      12000
2  1050       2.0      2   10       2     5000000      15000
3   950       3.0      3   20       3     1700000       7000
4   750       2.0      1    7       4     2700000      10000
In [39]:
print(data.describe())
              size    distance       floor         age     facing  \
count   100.000000  100.000000  100.000000  100.000000  100.00000   
mean    825.120000    2.669000    4.270000   12.270000    2.44000   
std     184.569457    1.347553    2.662326    6.731285    1.16619   
min     500.000000    1.000000    1.000000    1.000000    1.00000   
25%     680.000000    2.000000    2.000000    7.750000    1.00000   
50%     840.000000    2.000000    4.000000   12.000000    3.00000   
75%     950.000000    4.000000    6.000000   16.250000    3.00000   
max    1250.000000    6.000000   12.000000   30.000000    4.00000   

         flat_price     flat_rent  
count  1.000000e+02    100.000000  
mean   3.015000e+06   9018.000000  
std    1.231520e+06   2776.720688  
min    1.200000e+06   4000.000000  
25%    2.100000e+06   7000.000000  
50%    2.650000e+06   8000.000000  
75%    3.800000e+06  10000.000000  
max    5.900000e+06  18000.000000  
In [40]:
print("This is for flat_Price \n")
print(f"Mean flat_price: {data.flat_price.mean()}")
print(f"Median flat_price: {data.flat_price.median()}")
print(f"Highest flat_price: {data.flat_price.max()}")
print(f"Lowest flat_price: {data.flat_price.min()}")
# to finding the flat_rent
print("\n This is for Flat_Rent\n")
print(f"Mean flat_rent: {data.flat_rent.mean()}")
print(f"Median flat_rent: {data.flat_rent.median()}")
print(f"Highest flat_rent: {data.flat_rent.max()}")
print(f"Lowest flat_rent: {data.flat_rent.min()}")
data.head()
This is for flat_Price 

Mean flat_price: 3015000.0
Median flat_price: 2650000.0
Highest flat_price: 5900000
Lowest flat_price: 1200000

 This is for Flat_Rent

Mean flat_rent: 9018.0
Median flat_rent: 8000.0
Highest flat_rent: 18000
Lowest flat_rent: 4000
Out[40]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [41]:
figure = px.bar(data, x=data["flat_price"], 
                y = data["size"], 
                color = data["facing"],
            title="Size , flat_price and Facing")
figure.show()
In [42]:
figure = px.bar(data, x=data["flat_price"], 
                y = data["size"], 
                color = data["floor"],
            title="Size , Flat_price, Floor")
figure.show()
data.head()
Out[42]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [43]:
figure = px.bar(data, x=data["flat_rent"], 
                y = data["size"], 
                color = data["facing"],
            title="Size ,Flat_rent and Facing")
figure.show()
data.head()
Out[43]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [44]:
# Now we are going to splitting the data.
from sklearn.model_selection import train_test_split
x=np.array(data[["size","distance","floor","age","facing"]])
y=np.array(data[["flat_price"]])
In [45]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.4,random_state=42)
In [46]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(128, return_sequences=True, 
               input_shape= (xtrain.shape[1], 1)))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm (LSTM)                 (None, 5, 128)            66560     
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 25)                1625      
                                                                 
 dense_1 (Dense)             (None, 1)                 26        
                                                                 
=================================================================
Total params: 117,619
Trainable params: 117,619
Non-trainable params: 0
_________________________________________________________________
In [47]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(xtrain, ytrain, batch_size=1, epochs=21)
Epoch 1/21
60/60 [==============================] - 4s 5ms/step - loss: 10645548302336.0000
Epoch 2/21
60/60 [==============================] - 0s 5ms/step - loss: 10645253652480.0000
Epoch 3/21
60/60 [==============================] - 0s 4ms/step - loss: 10644838416384.0000
Epoch 4/21
60/60 [==============================] - 0s 5ms/step - loss: 10644275331072.0000
Epoch 5/21
60/60 [==============================] - 0s 5ms/step - loss: 10643569639424.0000
Epoch 6/21
60/60 [==============================] - 0s 5ms/step - loss: 10642715049984.0000
Epoch 7/21
60/60 [==============================] - 0s 5ms/step - loss: 10641697931264.0000
Epoch 8/21
60/60 [==============================] - 0s 5ms/step - loss: 10640523526144.0000
Epoch 9/21
60/60 [==============================] - 0s 5ms/step - loss: 10639208611840.0000
Epoch 10/21
60/60 [==============================] - 0s 5ms/step - loss: 10637732216832.0000
Epoch 11/21
60/60 [==============================] - 0s 5ms/step - loss: 10636113215488.0000
Epoch 12/21
60/60 [==============================] - 0s 5ms/step - loss: 10634363142144.0000
Epoch 13/21
60/60 [==============================] - 0s 5ms/step - loss: 10632465219584.0000
Epoch 14/21
60/60 [==============================] - 0s 5ms/step - loss: 10630419447808.0000
Epoch 15/21
60/60 [==============================] - 0s 5ms/step - loss: 10628263575552.0000
Epoch 16/21
60/60 [==============================] - 0s 5ms/step - loss: 10625954611200.0000
Epoch 17/21
60/60 [==============================] - 0s 5ms/step - loss: 10623520866304.0000
Epoch 18/21
60/60 [==============================] - 0s 5ms/step - loss: 10620962340864.0000
Epoch 19/21
60/60 [==============================] - 0s 5ms/step - loss: 10618265403392.0000
Epoch 20/21
60/60 [==============================] - 0s 5ms/step - loss: 10615463608320.0000
Epoch 21/21
60/60 [==============================] - 0s 5ms/step - loss: 10612530741248.0000
Out[47]:
<keras.callbacks.History at 0x15731a48cd0>
In [48]:
data.head()
Out[48]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [60]:
print("Enter House Details to Predict Flat_Price")
b = int(input("Size of the House: "))
c = float(input("Distance: "))
d = int(input("Number of Floors: "))
e = int(input("Age of the buildings: "))
f = int(input("Facing of the house: "))
features = np.array([[ b, c, d, e, f]])
print("Predicted House Price = ", model.predict(features))
Enter House Details to Predict Flat_Price
Size of the House: 680
Distance: 1.5
Number of Floors: 3
Age of the buildings: 0
Facing of the house: 4
1/1 [==============================] - 0s 28ms/step
Predicted House Price =  [[5698.6167]]
In [61]:
from sklearn import svm, datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
In [62]:
data=pd.read_csv("flatrentwithprice.csv")
x=data[["size","distance","floor","age","facing"]]
y=data[["flat_price"]]
In [63]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4, random_state=42)
clf = LogisticRegression(penalty='l2', C=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
In [64]:
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
Accuracy 0.025
In [65]:
x=data[["size"]]
y=data[["flat_price"]]
In [66]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.4, random_state=44)
In [67]:
clf = LogisticRegression(penalty='l2', C=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy", metrics.accuracy_score(y_test, y_pred))
data.head()
Accuracy 0.025
Out[67]:
size distance floor age facing flat_price flat_rent
0 870 1.5 1 2 1 2300000 9000
1 900 2.4 2 5 1 3000000 12000
2 1050 2.0 2 10 2 5000000 15000
3 950 3.0 3 20 3 1700000 7000
4 750 2.0 1 7 4 2700000 10000
In [57]:
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('flatrentwithprice.csv')
df[["size","flat_price"]].plot(
    xlabel='size',
    ylabel='flat_price',
    title='Size vs Flat_price'
)
plt.show()
In [ ]: